---

components:
  cleaner:
    init_parameters:
      remove_empty_lines: true
      remove_extra_whitespaces: true
      remove_regex: null
      remove_repeated_substrings: false
      remove_substrings: null
    type: haystack.components.preprocessors.document_cleaner.DocumentCleaner

  converter:
    init_parameters:
      encoding: utf-8
    type: haystack.components.converters.txt.TextFileToDocument

  embedder:
    init_parameters:
      api_base_url: null
      api_key:
        env_vars:
        - OPENAI_API_KEY
        strict: true
        type: env_var
      batch_size: 32
      dimensions: null
      embedding_separator: '\n'
      meta_fields_to_embed: []
      model: text-embedding-ada-002
      organization: null
      prefix: ''
      progress_bar: true
      suffix: ''
    type: haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder

  splitter:
    init_parameters:
      split_by: word
      split_length: 200
      split_overlap: 0
    type: haystack.components.preprocessors.document_splitter.DocumentSplitter

  writer:
    init_parameters:
      document_store:
        init_parameters:
          bm25_tokenization_regex: (?u)\b\w\w+\b
          bm25_algorithm: BM25L
          bm25_parameters: {}
          embedding_similarity_function: dot_product
          index: documents
        type: haystack.document_stores.in_memory.document_store.InMemoryDocumentStore
      policy: NONE
    type: haystack.components.writers.document_writer.DocumentWriter

connections:
- receiver: cleaner.documents
  sender: converter.documents
- receiver: splitter.documents
  sender: cleaner.documents
- receiver: embedder.documents
  sender: splitter.documents
- receiver: writer.documents
  sender: embedder.documents

metadata: {}
